1   /*
2    * Copyright (C) 2008 The Guava Authors
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    * http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  
17  package com.google.thirdparty.publicsuffix;
18  
19  import com.google.common.annotations.GwtCompatible;
20  import com.google.common.base.Joiner;
21  import com.google.common.collect.ImmutableMap;
22  import com.google.common.collect.Lists;
23  
24  import java.util.List;
25  
26  /**
27   * Parser for a map of reversed domain names stored as a serialized radix tree.
28   */
29  @GwtCompatible
30  class TrieParser {
31  
32    private static final Joiner PREFIX_JOINER = Joiner.on("");
33  
34    /**
35     * Parses a serialized trie representation of a map of reversed public
36     * suffixes into an immutable map of public suffixes.
37     */
38    static ImmutableMap<String, PublicSuffixType> parseTrie(CharSequence encoded) {
39      ImmutableMap.Builder<String, PublicSuffixType> builder = ImmutableMap.builder();
40      int encodedLen = encoded.length();
41      int idx = 0;
42      while (idx < encodedLen) {
43        idx += doParseTrieToBuilder(
44            Lists.<CharSequence>newLinkedList(),
45            encoded.subSequence(idx, encodedLen),
46            builder);
47      }
48      return builder.build();
49    }
50  
51    /**
52     * Parses a trie node and returns the number of characters consumed.
53     *
54     * @param stack The prefixes that preceed the characters represented by this
55     *     node. Each entry of the stack is in reverse order.
56     * @param encoded The serialized trie.
57     * @param builder A map builder to which all entries will be added.
58     * @return The number of characters consumed from {@code encoded}.
59     */
60    private static int doParseTrieToBuilder(
61        List<CharSequence> stack,
62        CharSequence encoded,
63        ImmutableMap.Builder<String, PublicSuffixType> builder) {
64  
65      int encodedLen = encoded.length();
66      int idx = 0;
67      char c = '\0';
68  
69      // Read all of the characters for this node.
70      for ( ; idx < encodedLen; idx++) {
71        c = encoded.charAt(idx);
72        if (c == '&' || c == '?' || c == '!' || c == ':' || c == ',') {
73          break;
74        }
75      }
76  
77      stack.add(0, reverse(encoded.subSequence(0, idx)));
78  
79      if (c == '!' || c == '?' || c == ':' || c == ',') {
80        // '!' represents an interior node that represents an ICANN entry in the map.
81        // '?' represents a leaf node, which represents an ICANN entry in map.
82        // ':' represents an interior node that represents a private entry in the map
83        // ',' represents a leaf node, which represents a private entry in the map.
84        String domain = PREFIX_JOINER.join(stack);
85        if (domain.length() > 0) {
86          builder.put(domain, PublicSuffixType.fromCode(c));
87        }
88      }
89      idx++;
90  
91      if (c != '?' && c != ',') {
92        while (idx < encodedLen) {
93          // Read all the children
94          idx += doParseTrieToBuilder(stack, encoded.subSequence(idx, encodedLen), builder);
95          if (encoded.charAt(idx) == '?' || encoded.charAt(idx) == ',') {
96            // An extra '?' or ',' after a child node indicates the end of all children of this node.
97            idx++;
98            break;
99          }
100       }
101     }
102     stack.remove(0);
103     return idx;
104   }
105 
106   /**
107    * Reverses a character sequence. This is borrowed from
108    * https://code.google.com/p/google-web-toolkit/source/detail?r=11591#
109    * and can be replaced with a simple {@code StringBuffer#reverse} once GWT 2.6 is available.
110    */
111   private static CharSequence reverse(CharSequence s) {
112     int length = s.length();
113     if (length <= 1) {
114       return s;
115     }
116 
117     char[] buffer = new char[length];
118     buffer[0] = s.charAt(length - 1);
119 
120     for (int i = 1; i < length; i++) {
121       buffer[i] = s.charAt(length - 1 - i);
122       if (Character.isSurrogatePair(buffer[i], buffer[i - 1])) {
123         swap(buffer, i - 1, i);
124       }
125     }
126 
127     return new String(buffer);
128   }
129 
130   private static void swap(char[] buffer, int f, int s) {
131     char tmp = buffer[f];
132     buffer[f] = buffer[s];
133     buffer[s] = tmp;
134   }
135 }